/* read in all files from the corpus folder */

local dir `c(pwd)'

qui {
use	data_bible_final, clear
local till=_N

forvalues i=1/`till' {
	use	data_bible_final, clear
	local file=translation[`i']
	noisily di "`file'"
	import delimited `"`dir'\corpus\\`file'.txt"', delimiter(tab) bindquote(nobind) varnames(nonames) stripquote(no) case(preserve) encoding(UTF-8) clear 
	/* verse id */
	gen vid=v1
	/* sentence id
	characters are lowered */
	gen sentence=ustrlower(v2,"`ISO'")
	replace vid=usubinstr(vid,"#","",.)
	replace vid=usubinstr(vid," ","",.)	
	drop v2 v1
	compress
	local save=ustrregexra("`file'",".txt","")
	save `"`dir'\corpus\\`save'"', replace
}
}

exit